{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e8a28b37",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>date</th>\n",
       "      <th>rank</th>\n",
       "      <th>title</th>\n",
       "      <th>author</th>\n",
       "      <th>url</th>\n",
       "      <th>tags</th>\n",
       "      <th>likes</th>\n",
       "      <th>favorites</th>\n",
       "      <th>views</th>\n",
       "      <th>comment</th>\n",
       "      <th>create_time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>120353348</td>\n",
       "      <td>20240709</td>\n",
       "      <td>1</td>\n",
       "      <td>Sa-fu</td>\n",
       "      <td>穂波ママ32👨‍👩‍👧</td>\n",
       "      <td>https://www.pixiv.net/artworks/120353348</td>\n",
       "      <td>['プロセカ', '穂波ママ', '朝比奈まふゆ', '存在しろ記憶', '洗脳されたコメ欄...</td>\n",
       "      <td>5993</td>\n",
       "      <td>7013</td>\n",
       "      <td>59547</td>\n",
       "      <td>74</td>\n",
       "      <td>2024-07-08T08:24:00+00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>120339739</td>\n",
       "      <td>20240709</td>\n",
       "      <td>2</td>\n",
       "      <td>Sheya</td>\n",
       "      <td>🐠🐠🐠</td>\n",
       "      <td>https://www.pixiv.net/artworks/120339739</td>\n",
       "      <td>['原神', '女の子', '少女', '水中', '創作', 'げんし神', '白髪', ...</td>\n",
       "      <td>6076</td>\n",
       "      <td>8041</td>\n",
       "      <td>34171</td>\n",
       "      <td>19</td>\n",
       "      <td>2024-07-07T16:10:00+00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>120337725</td>\n",
       "      <td>20240709</td>\n",
       "      <td>3</td>\n",
       "      <td>ぎばちゃん</td>\n",
       "      <td>実家のような安心感の無さに心細くなる奥さん</td>\n",
       "      <td>https://www.pixiv.net/artworks/120337725</td>\n",
       "      <td>['漫画', 'オリジナル', 'お茶目な奥さんとの日常茶番事', '会話のデッドボール',...</td>\n",
       "      <td>4630</td>\n",
       "      <td>6264</td>\n",
       "      <td>318068</td>\n",
       "      <td>270</td>\n",
       "      <td>2024-07-07T15:08:00+00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>120338404</td>\n",
       "      <td>20240709</td>\n",
       "      <td>4</td>\n",
       "      <td>夜風さらら</td>\n",
       "      <td>狐の窓～ゆるっと怪異～</td>\n",
       "      <td>https://www.pixiv.net/artworks/120338404</td>\n",
       "      <td>['うごイラ', '狐の窓', '書籍化', '絵も描けて動画も作れたらもう最強', 'ｷｪ...</td>\n",
       "      <td>3936</td>\n",
       "      <td>4586</td>\n",
       "      <td>55017</td>\n",
       "      <td>62</td>\n",
       "      <td>2024-07-07T15:26:00+00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>120339145</td>\n",
       "      <td>20240709</td>\n",
       "      <td>5</td>\n",
       "      <td>みすみ</td>\n",
       "      <td>無題</td>\n",
       "      <td>https://www.pixiv.net/artworks/120339145</td>\n",
       "      <td>['バーチャルYouTuber', 'ぶいすぽっ!', '花芽すみれ', 'バーチャルYou...</td>\n",
       "      <td>3876</td>\n",
       "      <td>4963</td>\n",
       "      <td>23077</td>\n",
       "      <td>13</td>\n",
       "      <td>2024-07-07T15:48:00+00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1042</th>\n",
       "      <td>119786759</td>\n",
       "      <td>20240619</td>\n",
       "      <td>46</td>\n",
       "      <td>なまず</td>\n",
       "      <td>グランドセフト年齢を詐称してるVtuber</td>\n",
       "      <td>https://www.pixiv.net/artworks/119786759</td>\n",
       "      <td>['漫画', 'バーチャルYouTuber', 'VTuber', '星空バアド', '免許...</td>\n",
       "      <td>1867</td>\n",
       "      <td>1961</td>\n",
       "      <td>82247</td>\n",
       "      <td>70</td>\n",
       "      <td>2024-06-19T12:35:00+00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1043</th>\n",
       "      <td>119737781</td>\n",
       "      <td>20240619</td>\n",
       "      <td>47</td>\n",
       "      <td>上山道郎</td>\n",
       "      <td>３０年目のドロンチェンジャー</td>\n",
       "      <td>https://www.pixiv.net/artworks/119737781</td>\n",
       "      <td>['ナリタトップロード(ウマ娘)', '土田大', '忍者戦隊カクレンジャー', 'ラクガキ...</td>\n",
       "      <td>887</td>\n",
       "      <td>895</td>\n",
       "      <td>23367</td>\n",
       "      <td>45</td>\n",
       "      <td>2024-06-17T15:40:00+00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1044</th>\n",
       "      <td>119751559</td>\n",
       "      <td>20240619</td>\n",
       "      <td>48</td>\n",
       "      <td>葵燐-kirin-</td>\n",
       "      <td>【新刊サンプル】空回りメリーゴーランド</td>\n",
       "      <td>https://www.pixiv.net/artworks/119751559</td>\n",
       "      <td>['ポケモンSV', 'ペパアオ', 'ペパー(トレーナー)', 'アオイ(ポケモンSV)'...</td>\n",
       "      <td>1363</td>\n",
       "      <td>1312</td>\n",
       "      <td>51909</td>\n",
       "      <td>16</td>\n",
       "      <td>2024-06-18T07:41:00+00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1045</th>\n",
       "      <td>119753617</td>\n",
       "      <td>20240619</td>\n",
       "      <td>49</td>\n",
       "      <td>一akeng</td>\n",
       "      <td>无题</td>\n",
       "      <td>https://www.pixiv.net/artworks/119753617</td>\n",
       "      <td>['原神', 'Genshin', '芙宁娜', 'フリーナ(原神)', '原神1000us...</td>\n",
       "      <td>2386</td>\n",
       "      <td>3559</td>\n",
       "      <td>13598</td>\n",
       "      <td>9</td>\n",
       "      <td>2024-06-18T09:12:00+00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1046</th>\n",
       "      <td>119743618</td>\n",
       "      <td>20240619</td>\n",
       "      <td>50</td>\n",
       "      <td>MILKTEA</td>\n",
       "      <td>歯磨き</td>\n",
       "      <td>https://www.pixiv.net/artworks/119743618</td>\n",
       "      <td>['百合', '夜のクラゲは泳げない', '山ノ内花音', '光月まひる', 'まひかの']</td>\n",
       "      <td>850</td>\n",
       "      <td>1268</td>\n",
       "      <td>7166</td>\n",
       "      <td>3</td>\n",
       "      <td>2024-06-17T22:30:00+00:00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1047 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             id      date  rank      title                 author  \\\n",
       "0     120353348  20240709     1      Sa-fu            穂波ママ32👨‍👩‍👧   \n",
       "1     120339739  20240709     2      Sheya                    🐠🐠🐠   \n",
       "2     120337725  20240709     3      ぎばちゃん  実家のような安心感の無さに心細くなる奥さん   \n",
       "3     120338404  20240709     4      夜風さらら            狐の窓～ゆるっと怪異～   \n",
       "4     120339145  20240709     5        みすみ                     無題   \n",
       "...         ...       ...   ...        ...                    ...   \n",
       "1042  119786759  20240619    46        なまず  グランドセフト年齢を詐称してるVtuber   \n",
       "1043  119737781  20240619    47       上山道郎         ３０年目のドロンチェンジャー   \n",
       "1044  119751559  20240619    48  葵燐-kirin-    【新刊サンプル】空回りメリーゴーランド   \n",
       "1045  119753617  20240619    49     一akeng                     无题   \n",
       "1046  119743618  20240619    50    MILKTEA                    歯磨き   \n",
       "\n",
       "                                           url  \\\n",
       "0     https://www.pixiv.net/artworks/120353348   \n",
       "1     https://www.pixiv.net/artworks/120339739   \n",
       "2     https://www.pixiv.net/artworks/120337725   \n",
       "3     https://www.pixiv.net/artworks/120338404   \n",
       "4     https://www.pixiv.net/artworks/120339145   \n",
       "...                                        ...   \n",
       "1042  https://www.pixiv.net/artworks/119786759   \n",
       "1043  https://www.pixiv.net/artworks/119737781   \n",
       "1044  https://www.pixiv.net/artworks/119751559   \n",
       "1045  https://www.pixiv.net/artworks/119753617   \n",
       "1046  https://www.pixiv.net/artworks/119743618   \n",
       "\n",
       "                                                   tags  likes  favorites  \\\n",
       "0     ['プロセカ', '穂波ママ', '朝比奈まふゆ', '存在しろ記憶', '洗脳されたコメ欄...   5993       7013   \n",
       "1     ['原神', '女の子', '少女', '水中', '創作', 'げんし神', '白髪', ...   6076       8041   \n",
       "2     ['漫画', 'オリジナル', 'お茶目な奥さんとの日常茶番事', '会話のデッドボール',...   4630       6264   \n",
       "3     ['うごイラ', '狐の窓', '書籍化', '絵も描けて動画も作れたらもう最強', 'ｷｪ...   3936       4586   \n",
       "4     ['バーチャルYouTuber', 'ぶいすぽっ!', '花芽すみれ', 'バーチャルYou...   3876       4963   \n",
       "...                                                 ...    ...        ...   \n",
       "1042  ['漫画', 'バーチャルYouTuber', 'VTuber', '星空バアド', '免許...   1867       1961   \n",
       "1043  ['ナリタトップロード(ウマ娘)', '土田大', '忍者戦隊カクレンジャー', 'ラクガキ...    887        895   \n",
       "1044  ['ポケモンSV', 'ペパアオ', 'ペパー(トレーナー)', 'アオイ(ポケモンSV)'...   1363       1312   \n",
       "1045  ['原神', 'Genshin', '芙宁娜', 'フリーナ(原神)', '原神1000us...   2386       3559   \n",
       "1046     ['百合', '夜のクラゲは泳げない', '山ノ内花音', '光月まひる', 'まひかの']    850       1268   \n",
       "\n",
       "       views  comment                create_time  \n",
       "0      59547       74  2024-07-08T08:24:00+00:00  \n",
       "1      34171       19  2024-07-07T16:10:00+00:00  \n",
       "2     318068      270  2024-07-07T15:08:00+00:00  \n",
       "3      55017       62  2024-07-07T15:26:00+00:00  \n",
       "4      23077       13  2024-07-07T15:48:00+00:00  \n",
       "...      ...      ...                        ...  \n",
       "1042   82247       70  2024-06-19T12:35:00+00:00  \n",
       "1043   23367       45  2024-06-17T15:40:00+00:00  \n",
       "1044   51909       16  2024-06-18T07:41:00+00:00  \n",
       "1045   13598        9  2024-06-18T09:12:00+00:00  \n",
       "1046    7166        3  2024-06-17T22:30:00+00:00  \n",
       "\n",
       "[1047 rows x 12 columns]"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv(\"./spider/data/data.csv\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e60be6d7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "id             False\n",
       "date           False\n",
       "rank           False\n",
       "title           True\n",
       "author         False\n",
       "url            False\n",
       "tags           False\n",
       "likes          False\n",
       "favorites      False\n",
       "views          False\n",
       "comment        False\n",
       "create_time    False\n",
       "rank_score     False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#处理空值\n",
    "df.isnull().any()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "d5e4a5b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"title\"] = df[\"title\"].fillna(\"无题\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "5c4eb394",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>date</th>\n",
       "      <th>rank</th>\n",
       "      <th>likes</th>\n",
       "      <th>favorites</th>\n",
       "      <th>views</th>\n",
       "      <th>comment</th>\n",
       "      <th>rank_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1.047000e+03</td>\n",
       "      <td>1.047000e+03</td>\n",
       "      <td>1047.000000</td>\n",
       "      <td>1047.000000</td>\n",
       "      <td>1047.000000</td>\n",
       "      <td>1047.000000</td>\n",
       "      <td>1047.000000</td>\n",
       "      <td>1047.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.200556e+08</td>\n",
       "      <td>2.024066e+07</td>\n",
       "      <td>25.429799</td>\n",
       "      <td>3523.113658</td>\n",
       "      <td>4578.590258</td>\n",
       "      <td>57205.036294</td>\n",
       "      <td>41.517670</td>\n",
       "      <td>75.570201</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.826872e+05</td>\n",
       "      <td>3.998541e+01</td>\n",
       "      <td>14.398607</td>\n",
       "      <td>3676.490164</td>\n",
       "      <td>4773.152768</td>\n",
       "      <td>75446.751889</td>\n",
       "      <td>61.102048</td>\n",
       "      <td>14.398607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.197361e+08</td>\n",
       "      <td>2.024062e+07</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>307.000000</td>\n",
       "      <td>557.000000</td>\n",
       "      <td>2060.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>51.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.199006e+08</td>\n",
       "      <td>2.024062e+07</td>\n",
       "      <td>13.000000</td>\n",
       "      <td>1294.500000</td>\n",
       "      <td>1621.000000</td>\n",
       "      <td>16477.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>63.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.200609e+08</td>\n",
       "      <td>2.024063e+07</td>\n",
       "      <td>25.000000</td>\n",
       "      <td>2225.000000</td>\n",
       "      <td>2842.000000</td>\n",
       "      <td>32550.000000</td>\n",
       "      <td>18.000000</td>\n",
       "      <td>76.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.202162e+08</td>\n",
       "      <td>2.024070e+07</td>\n",
       "      <td>38.000000</td>\n",
       "      <td>4397.000000</td>\n",
       "      <td>5858.000000</td>\n",
       "      <td>66335.000000</td>\n",
       "      <td>42.000000</td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.203853e+08</td>\n",
       "      <td>2.024071e+07</td>\n",
       "      <td>50.000000</td>\n",
       "      <td>28304.000000</td>\n",
       "      <td>29315.000000</td>\n",
       "      <td>460106.000000</td>\n",
       "      <td>525.000000</td>\n",
       "      <td>100.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 id          date         rank         likes     favorites  \\\n",
       "count  1.047000e+03  1.047000e+03  1047.000000   1047.000000   1047.000000   \n",
       "mean   1.200556e+08  2.024066e+07    25.429799   3523.113658   4578.590258   \n",
       "std    1.826872e+05  3.998541e+01    14.398607   3676.490164   4773.152768   \n",
       "min    1.197361e+08  2.024062e+07     1.000000    307.000000    557.000000   \n",
       "25%    1.199006e+08  2.024062e+07    13.000000   1294.500000   1621.000000   \n",
       "50%    1.200609e+08  2.024063e+07    25.000000   2225.000000   2842.000000   \n",
       "75%    1.202162e+08  2.024070e+07    38.000000   4397.000000   5858.000000   \n",
       "max    1.203853e+08  2.024071e+07    50.000000  28304.000000  29315.000000   \n",
       "\n",
       "               views      comment   rank_score  \n",
       "count    1047.000000  1047.000000  1047.000000  \n",
       "mean    57205.036294    41.517670    75.570201  \n",
       "std     75446.751889    61.102048    14.398607  \n",
       "min      2060.000000     0.000000    51.000000  \n",
       "25%     16477.000000     9.000000    63.000000  \n",
       "50%     32550.000000    18.000000    76.000000  \n",
       "75%     66335.000000    42.000000    88.000000  \n",
       "max    460106.000000   525.000000   100.000000  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#检查是否有异常值\n",
    "df.describe()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
